# Produce results tables and figs
pacman::p_load(tidyverse, cowplot, viridis, ggplot2, lfe, kableExtra, fixest, sf, scales, lubridate, tinytex, marginaleffects, modelsummary, fst, qs, MatchIt, patchwork)

# Make sure modelsummary uses kableExtra and not tinytable
options(modelsummary_factory_latex_tabular = 'kableExtra')

# SETUP ----

rm(list = ls())

source("code/globals.R")

data <- qread(file.path(WORKING, "analysisdataset.qs"))

# Generate a variable for built before or after code
data <- data %>% mutate(afterCode = as.numeric(period %in% c(1, 2)))

# Transform wildfire hazard to %
data <- data %>% mutate(wildfire_hazard_pct = wildfire_hazard * 100)

# Squished version of year built
data <- data %>% mutate(combinedyear_squished = oob_squish_any(combinedyear, c(1945, 2010)))

# Two samples: main (no CA controls) and CA (only CA controls)
regs <- data %>% filter(sample_main)
regsCA <- data %>% filter(sample_CA)

regs %>% count(treatmentgroup, regime)
regsCA %>% count(treatmentgroup, regime)

regs %>% count(treatmentgroup, period)
regsCA %>% count(treatmentgroup, period)

# Compare # of controls from CA and not-CA by vintage (period)
just_controls <- regs %>% filter(treatmentgroup == "No-codes") %>% transmute(period, controlsource = "Non-CA Controls") %>%
  bind_rows(regsCA %>% filter(treatmentgroup == "No-codes") %>% transmute(period, controlsource = "CA Controls"))

just_controls %>% count(controlsource, period)

# Get % of homes geocoded with building footprints (reported in paper)
regs %>% count(loc_source) %>% mutate(p = n / sum(n))

# Get average percent destroyed by regime
regs %>% group_by(regime, period) %>% summarize(sharelost = mean(destroyed))



# EMPIRICAL ANALYSES -----

## Scatterplot of raw means, SRA -----


srameans <- regs %>% filter(regime == "sra") %>%
  group_by(combinedyear_squished) %>%
  summarize(
    sharelost = mean(destroyed),
    sqfeet = mean(sqfeet, na.rm = TRUE),
    slope = mean(slope, na.rm = TRUE),
    elev = mean(elev, na.rm = T),
    numobs = n()
  )

# Function to make scatterplots of raw means (both share lost and placebo tests of covariates)
plot_scatter <- function(combinedyear, covar, numobs = NA, name, ybottom = 0.5, ytop = 0.1, smoothlines = F) {
  df <- data.frame(combinedyear = combinedyear, covar = covar, numobs = numobs)
  
  # Set up the plot
  p <- ggplot(df, aes(x = combinedyear)) +
    annotate("rect", xmin = 1991, xmax = 1998, ymin = ybottom, ymax = ytop, alpha = .1, fill = "grey") + 
    geom_vline(
      xintercept = c(1991, 1993, 1995, 1997), alpha = 0.5,
      color = c("red", rep("black", 3)), linetype = "dashed"
    ) +
    annotate("text",
             x = c(1991, 1993, 1995, 1997), y = ytop - .1 * (ytop - ybottom),
             label = c("Tunnel Fire", "Bates Bill", "Class B/C Roof", "Class A/B Roof"),
             color = c("red", rep("black", 3)),
             hjust = 1, angle = 85, fontface = "italic"
    )
  
  # Add smoothing lines for 10 years before / after legislation if requested
  if (smoothlines) {
    p <- p + 
      geom_smooth(
        data = df %>% filter(between(combinedyear, 1981, 1991)),
        aes(x = combinedyear, y = covar, weight = numobs),
        method = lm, formula = y ~ 1, se = F, color = "blue", linewidth = 0.75, alpha = 0.5
      ) +
      geom_smooth(
        data = df %>% filter(between(combinedyear, 1998, 2008)),
        aes(x = combinedyear, y = covar, weight = numobs),
        method = lm, formula = y ~ 1, se = F, color = "blue", linewidth = 0.75, alpha = 0.5
      )
  }
  
  p <- p + 
    geom_point(aes(y = covar), color = "black", size = 3) +
    scale_x_continuous(breaks = seq(1940, 2020, 10), name = "Effective Year Built") +
    scale_y_continuous(name = name) +
    globalplotoptions
  
  p
}

p = plot_scatter(srameans$combinedyear_squished, 
                 srameans$sharelost, numobs = srameans$numobs, name = "P(Destroyed)", 
             ybottom = 0, ytop = 0.5, smoothlines = T)

save_plot(file.path(RES,"sra_raw.pdf"), plot = p, base_asp=1, base_height=6)


## Scatter of placebo covars, SRA -----

# Plot structure square footage
p_sqft = plot_scatter(srameans$combinedyear_squished, srameans$sqfeet, numobs = NA,
             name = "Square Footage", 
             ybottom = 0, 
             ytop = max(srameans$sqfeet, na.rm = TRUE) + 500)

save_plot(file.path(RES, "placebo_sqft.pdf"), plot = p_sqft, base_asp=1,base_height=6)


# Plot ground slope around home
p_slope = plot_scatter(srameans$combinedyear_squished, srameans$slope, numobs = NA,
             name = "Ground Slope", 
             ybottom = min(srameans$slope, na.rm = TRUE) - 1, 
             ytop = max(srameans$slope, na.rm = TRUE) + 1)

save_plot(file.path(RES, "placebo_slope.pdf"), plot = p_slope, base_asp = 1, base_height = 6)

## Event study figures -----

#### Function to make event study plots (many options)

eventstudy2 <- function(this_data, 
                        binvar = "twoyearbin",
                        ref = "1987",
                        ybottom = -0.35, ytop = 0.25, policylabels = T, xaxis = T, histogrambelow = T) {
  
  # this_data <- regs %>% filter(regime %in% "sra")
  # binvar = "twoyearbin"
  # sample = c("lraNN")
  # ref = "1987"
  # binvar = "tenyearbin"
  # ref = "1979"

  # Get the requested bin variable
  this_data <- this_data %>%
    mutate(yearbin = get(binvar)) # Get binvar
  
  this_data <- this_data %>%
    filter(complete.cases(destroyed, combinedyear, streetXfire, street25Xfire, incidentid)) %>%
    select(ImportParcelID, BuildingOrImprovementNumber, incidentid, regime, period, treatmentgroup, destroyed, combinedyear, yearbin, streetXfire, street25Xfire, slope, elev, wildfire_hazard_pct, fuelmodel, logLotSize, sqfeet1000, TotalBedrooms)
  
  # Create an event study plot
  # fmla <- as.formula(sprintf("destroyed ~ i(yearbin, ref = %s) | streetXfire", ref)) # Old version -- not controlling for slope, etc.
  fmla_controls <- as.formula(sprintf("destroyed ~ i(yearbin, ref = %s) + slope | fuelmodel + street25Xfire", ref))
  fit <- feols(fmla_controls, data = this_data) %>% summary(cluster = ~incidentid)

  # Get all the coefficient estimates
  fit_df <- coefplot(fit, ref = paste0("yearbin::", ref), keep = "yearbin")$prms
  
  # Compute a year variable to order correctly, then recompute x
  fit_df <- fit_df %>%
    mutate(year = as.numeric(str_extract(estimate_names, "\\d+")))
  
  # Guess binwidth by computing mode of differences
  binwidth <- Mode(lead(fit_df$year) - fit_df$year)
  
  fit_df <- fit_df %>%
    mutate(year = case_when( # -2 if there's a "<"
      grepl("<", estimate_names_raw, fixed = T) ~ year - binwidth,
      TRUE ~ year
    )) %>% 
    arrange(year)
  
  
  p <- ggplot(fit_df, aes(x = year + 0.5 * binwidth))
  
  if (policylabels == TRUE) {
    yearlines <- c(1991, 1993, 1995, 1997, 2007)
    labcolors <- c("red", "black", "black", "black", "black")
    p <- p + # annotate("rect", xmin = "1991", xmax = "1997",ymin=ybottom,ymax=ytop,alpha = .1,fill = "grey")+
      geom_vline(
        xintercept = yearlines, 
        color = labcolors,
        alpha = 0.5, linetype = "dashed"
      ) +
      annotate("text",
               x = yearlines, y = ytop, # ytop-.01*(ytop-ybottom
               label =c("Tunnel Fire", "Bates Bill", "Class B/C Roof", "Class A/B Roof", "Chapter 7A"),
               color = labcolors,
               hjust = 1, vjust = 0, angle = 90, fontface = "italic"
      )
  }
  
  # Add rest of plot
  p <- p + 
    geom_ribbon(aes(ymax = ci_high, ymin = ci_low, group = TRUE), color = NA, fill = "gray", alpha = 0.25) +
    geom_line(aes(y = estimate, group = TRUE), color = "red") +
    geom_point(aes(y = estimate), color = "red", size = 1.25) +
    geom_hline(yintercept = 0, color = "black") +
    scale_y_continuous(name = "P(Destroyed) Relative to 1987", limits = c(ybottom, ytop), breaks = c(-0.3, -0.2, -0.1, 0, 0.1, 0.2)) +
    scale_x_continuous(name = "Effective Year Built of Home", 
                       limits = c(1949, 2019),
                       breaks = seq(1950, 2010, 10), labels = seq(1950, 2010, 10)) + 
    globalplotoptions
  
  # Remove x axis if desired
  if (xaxis == FALSE) {
    p <- p + theme(
      axis.text.x = element_blank(),
      axis.title.x = element_blank(),
      axis.ticks.x = element_blank(),
      plot.margin = margin(0, 5.5, 24, 5.5, unit = "pt")
    ) # bottom margin on the top plot controls spacing between the two gridded plots)
  } else {
    plot.margin = margin(5.5, 5.5, 5.5, 5.5, unit = "pt")
  }
  
  # Return a version with a histogram if desired
  if (histogrambelow == TRUE) {
    bin_counts <- this_data %>% count(yearbin) %>%
      mutate(year = as.numeric(str_extract(yearbin, "\\d+"))) %>%
      mutate(year = case_when(
        grepl("<", yearbin, fixed = T) ~ year - binwidth,
        TRUE ~ year
      ))
    
    fit_df <- fit_df %>%
      left_join(bin_counts, by = "year")
    
    
    h <- ggplot(fit_df, aes(x = year + binwidth * 0.5, y = n * 0.1)) + # The number is a scaling factor
      geom_col(width = binwidth, color = "white") + 
      lims(y = c(0, max(bin_counts$n))) + 
      scale_x_continuous(name = "Effective Year Built of Home",
                         limits = c(1949, 2019),
                         breaks = seq(1950, 2010, 10), labels = seq(1950, 2010, 10)) + 
      theme_nothing()

    aligned_plots <- cowplot::align_plots(p, h, align="hv", axis="tblr")
    return(ggdraw(aligned_plots[[1]]) + draw_plot(aligned_plots[[2]]))
  } else {
    return(p)
  }
}

p1 <- eventstudy2(regs %>% filter(regime %in% "sra"))
save_plot(file.path(RES, "es_sra.pdf"), plot = p1, base_asp = 1.25)

p2 <- eventstudy2(regs %>% filter(regime %in% c("lraYY", "lraYN", "lraNY")))
save_plot(file.path(RES, "es_lraVH.pdf"), plot = p2, base_asp = 1.25)

p3 <- eventstudy2(regs %>% filter(regime %in% c("otherstates")),
                  binvar = "tenyearbin", ref = "1979",
                  policylabels = F)
save_plot(file.path(RES, "es_otherstates.pdf"), plot = p3, base_asp = 1.25)

p4 <- eventstudy2(data %>% filter(regime %in% c("lraNN")),
                  binvar = "tenyearbin", ref = "1979",
                  policylabels = F)
# Drop between 1979-1988 and 1989-1998 in this estimate is consistent with reports of inconsistent enforcement of local building codes

## Combined event study in 3 panels (for paper)
c1 <- eventstudy2(regs %>% filter(regime %in% "sra"), xaxis = F) +
  theme(plot.title = element_text(hjust = 0.5)) +
  ggtitle("SRA (Codes Mandatory)")
c2 <- eventstudy2(regs %>% filter(regime %in% c("lraYY", "lraYN", "lraNY")), xaxis = F) +
  theme(plot.title = element_text(hjust = 0.5)) +
  ggtitle("LRA VHFHSZ (Codes Recommended)")
c3 <- eventstudy2(regs %>% filter(regime %in% c("otherstates")),
                  binvar = "tenyearbin", ref = "1979",
                  policylabels = F) + 
  theme(plot.title = element_text(hjust = 0.5)) +
                    ggtitle("No Mandatory or Recommended Codes")

save_plot(filename = file.path(RES, "combinedeventstudies.pdf"), 
          plot = plot_grid(c1, c2, c3, ncol = 1, align = "v", axis = "tblr"), base_height = 10, base_aspect_ratio = 0.62)




## MAIN REGRESSIONS -----

### Table: Main results -----

# Set up estimation formulas
fmla_inc <- as.formula("destroyed ~ i(period, treatmentgroup, ref = 0) + slope + i(fuelmodel) | incidentid^treatmentgroup")
fmla_street <- as.formula("destroyed ~ i(period, treatmentgroup, ref = 0) + slope + i(fuelmodel) | streetXfire^treatmentgroup")
fmla_substreet1 <- as.formula("destroyed ~ i(period, treatmentgroup, ref = 0) + slope + i(fuelmodel) | street25Xfire^treatmentgroup")
fmla_substreet2 <- as.formula("destroyed ~ i(period, treatmentgroup, ref = 0) + slope + i(fuelmodel) + elev + wildfire_hazard_pct + LotSizeAcres + sqfeet1000 + TotalBedrooms | street25Xfire^treatmentgroup")


mainfits <- list()
mainfits$inc <- feols(fmla_inc, data = regs)
mainfits$street <- feols(fmla_street, data = regs)
mainfits$substreet1 <- feols(fmla_substreet1, data = regs)
mainfits$substreet2 <- feols(fmla_substreet2, data = regs) 


etable(mainfits,
  cluster = "incidentid",
  headers = list(":_:" = list("Incident FE" = 1, "Street FE" = 3)),
  group = list(
    "\\\\ Fuel model indicators" = c("Fuel"),
    "Additional controls" = c("Elevation", "Wildfire hazard", "Square feet", "Bedrooms", "Lot size", "log")
  ),
  tex = T
) %>%
  writeLines(file.path(RES, "mainregs.tex"))

etable(mainfits,
  cluster = "incidentid",
  headers = list(":_:" = list("Incident FE" = 1, "Street FE" = 3)),
  tex = T) %>%
  writeLines(file.path(RES, "mainregs_allcoefs.tex"))


## Sensitivity checks on main results -----

# Since we have a lot more sensitivity checks due to revision, we split into two tables: sensitivity to subset selection, sensitivity to specification

### Appx Table: Sensitivity to estimating sample ------

fits_sample <- list()


#### Within-CA Control group

fits_sample[["CA\nControls"]] <- feols(fmla_substreet1, data = regsCA) %>% summary(cluster = ~incidentid)

#### Combined control group

fits_sample[["Combined\nControls"]] <- feols(fmla_substreet1, data = data %>% filter(regime %in% c("sra", "lraNY", "lraNY", "lraYY", "lraNN", "otherstates"))) %>% summary(cluster = ~incidentid)

##### Balanced regression
fits_sample[["Balanced Panel"]] <- feols(fmla_substreet1,
  data = regs %>% filter(fireyear >= 2015 & combinedyear <= 2014)
) %>% summary(cluster = ~incidentid)

##### Only homes that are not involved in multiple incidents
fits_sample[["Single\nIncident"]] <- feols(fmla_substreet1, data = regs %>% filter(multiple_incidents == FALSE)) %>% summary(cluster = ~incidentid)

##### Only homes at least 50m from the perimeter
fits_sample[["50m+\nFrom\nPerimeter"]] <- feols(fmla_substreet1, data = regs %>% filter(dist_to_perim < -50)) %>% summary(cluster = ~incidentid)

#### Using only fires that put 1000+ homes at risk.
fits_sample[["1000+\nHomes\nAt Risk"]] <- feols(fmla_substreet1, data = regs %>% filter(inc_atrisk_gt1000 == TRUE)) %>% summary(cluster = ~incidentid)

#### Resource constrained fires

fits_sample[["Limited\nResources"]] <- feols(fmla_substreet1, data = regs %>% filter(inc_atrisk_gt1000 == TRUE, inc_constrained == TRUE)) %>% summary(cluster = ~incidentid)

etable(fits_sample,
  group = list("\\\\ Fuel model indicators" = c("Fuel")),
  headers = list("^" = names(fits_sample)),
  tex = T
) %>% 
  writeLines(file.path(RES, "appxregs-samples.tex"))



### Appx Table: Sensitivity to specification choice -----

fits_specs <- list()

##### Drop covars
fits_specs[["No\ncontrols"]] <- feols(destroyed ~ i(period, treatmentgroup, ref = 0) | street25Xfire^treatmentgroup, data = regs) %>% summary(cluster = ~incidentid)

##### Street X Side FEs
fits_specs[["Street\nSide"]] <- feols(destroyed ~ i(period, treatmentgroup, ref = 0) + slope + i(fuelmodel) | street25XfireXside^treatmentgroup, data = regs) %>% summary(cluster = ~incidentid)

##### Matching regression
# Update from original matching regression, now uses a Pscore matching approach with more covariates

match_data <- data %>%
  mutate(treat = treatmentgroup != "No-codes") %>%
  mutate(roundyear = 5 * floor(combinedyear / 5)) %>% # Use year rounded to 5 for very early years in data
  mutate(combinedyear = case_when(
    is.na(combinedyear) | combinedyear < 1900 ~ 1900,
    combinedyear < 1935 & combinedyear >= 1900 ~ roundyear,
    combinedyear >= 1935 ~ combinedyear
  )) %>%
  select(-roundyear) %>%
  filter(complete.cases(treat, combinedyear, slope, fuelmodel, LotSizeAcres, sqfeet1000, TotalBedrooms, elev, wildfire_hazard_pct)) # Ensure all matching variables are present

# Implement nearest neighbor matching using vintage and col (3) control variables
# Takes a bit of time to run
match_out <- matchit(treat ~ combinedyear + slope + i(fuelmodel),
  data = match_data,
  method = "nearest", distance = "glm", link = "probit",
  replace = T
)
summary(match_out)

match_out_data <- match.data(match_out)

fits_specs[["Matching"]] <- feols(fmla_substreet1, data = match_out_data, weights = ~weights) %>% summary(cluster = ~incidentid)

### Traditional DID (replaces the full table version we originally developed)

fits_specs[["DID"]] <- feols(as.formula("destroyed ~ i(period, ref = 0) + i(treatmentgroup, period, ref = \"No-codes\", ref2 = 0) + slope + i(fuelmodel) | street25Xfire^treatmentgroup"), 
                             data = regs) %>% summary(cluster = ~incidentid)

#### Logit

# Create interactions manually for ease w/ modelsummary
regs <- regs %>%
  mutate(
    period1XNoCodes = period == 1 & treatmentgroup == "No-codes",
    period1XLRA = period == 1 & treatmentgroup == "LRA-VHFHSZ",
    period1XSRA = period == 1 & treatmentgroup == "SRA",
    period2XNoCodes = period == 2 & treatmentgroup == "No-codes",
    period2XLRA = period == 2 & treatmentgroup == "LRA-VHFHSZ",
    period2XSRA = period == 2 & treatmentgroup == "SRA"
  )

logitfits <- list()
logitfits$lpm_str1 <- feols(destroyed ~ period1XNoCodes + period1XLRA + period1XSRA + period2XNoCodes + period2XLRA + period2XSRA + slope + i(fuelmodel) | street25Xfire^treatmentgroup, data = regs)

logitfits$logit_str1 <- feglm(destroyed ~ period1XNoCodes + period1XLRA + period1XSRA + period2XNoCodes + period2XLRA + period2XSRA + slope + i(fuelmodel) | street25Xfire^treatmentgroup, data = regs, family = "logit")

etable(logitfits) # Note that logit explicitly drops a bunch of singletons, whereas LPM keeps them. This is why the LPM has more observations.

# Get GOF measures for logit
mean(logitfits$logit_str1$y) # 0.4833583
logitfits$logit_str1$pseudo_r2 # 0.0924

# Compute average marginal effects
mfx <- avg_slopes(logitfits$logit_str1, variables = c("period1XNoCodes", "period1XLRA", "period1XSRA", "period2XNoCodes", "period2XLRA", "period2XSRA", "slope"))
summary(mfx)

#### Make table combining LPM and Logit estimates

specs_tex <- modelsummary(c(fits_specs, list(mfx)),
             coef_map = c(
               "period1XSRA" = "1998--2007 $\\times$ SRA",
               "period::1:treatmentgroup::SRA" = "1998--2007 $\\times$ SRA",
               "treatmentgroup::SRA:period::1" = "1998--2007 $\\times$ SRA",
               "period2XSRA" = "2008--2016 $\\times$ SRA",
               "period::2:treatmentgroup::SRA" = "2008--2016 $\\times$ SRA",
               "treatmentgroup::SRA:period::2" = "2008--2016 $\\times$ SRA",
               "period1XLRA" = "1998--2007 $\\times$ LRA-VHFHSZ",
               "period::1:treatmentgroup::LRA-VHFHSZ" = "1998--2007 $\\times$ LRA-VHFHSZ",
               "treatmentgroup::LRA-VHFHSZ:period::1" = "1998--2007 $\\times$ LRA-VHFHSZ",
               "period2XLRA" = "2008--2016 $\\times$ LRA-VHFHSZ",
               "period::2:treatmentgroup::LRA-VHFHSZ" = "2008--2016 $\\times$ LRA-VHFHSZ",
               "treatmentgroup::LRA-VHFHSZ:period::2" = "2008--2016 $\\times$ LRA-VHFHSZ",
               "period1XNoCodes" = "1998--2007 $\\times$ No-codes",
               "period::1:treatmentgroup::No-codes" = "1998--2007 $\\times$ No-codes",
               "period2XNoCodes" = "2008--2016 $\\times$ No-codes",
               "period::2:treatmentgroup::No-codes" = "2008--2016 $\\times$ No-codes",
               "period::1" = "1998--2007",
               "period::2" = "2008--2016",
               "slope" = "Ground slope (degrees)"),
             stars = c("***"=0.01, "**"=0.05, "*"=0.10),
             escape = F, 
             gof_map = c("nobs", "r.squared", "my"),
             output = "latex_tabular")

# We report # of obs manually so note these
print(specs_tex)
# Num.Obs. & \num{45097} & \num{45097} & \num{36374} & \num{45097} & \num{21810}\\

# Split this up
specs_tex_split <- strsplit(as.character(specs_tex), "\\n")[[1]]

# Get indices of rows between midrules
midrule_indices <- grep("midrule", specs_tex_split)
specs_tex_inner <- specs_tex_split[-c(1:midrule_indices[1], midrule_indices[2]:length(specs_tex_split))]

# Save only inner numbers (formatting is mostly in the .tex document)
specs_tex_inner %>% write("results/appxregs-specs-inner.tex")
             

# BALANCE -----

# BALANCE: STANDARDIZED MEAN DIFFERENCES OF RESIDUALIZED COVARS ACROSS TREATMENT GROUPS ------

dmn_df <- demean(slope + LotSizeAcres + LotSizeAcresW + sqfeet1000 + TotalBedrooms + elev + wildfire_hazard_pct ~ street25Xfire, data = regs, na.rm = F)
names(dmn_df) <- paste0(names(dmn_df), "_dmn")
ndiffs <- cbind(regs, dmn_df)

ndiffs <- ndiffs %>% 
  filter(complete.cases(slope, LotSizeAcres, sqfeet1000, TotalBedrooms, elev, wildfire_hazard_pct))

# Recode period
ndiffs <- ndiffs %>% 
  mutate(period_char = case_when(
    period == 0 ~ "Pre-1998",
    period == 1 ~ "1998--2007",
    period == 2 ~ "2008--2016"))
    
# Get SMDs for each dataset
get_SMDs_for_data <- function(data, vars) {
  get_SMDs(x_vars = vars,
           data = data,
           cat = data$period_char,
           ref = "Pre-1998") %>%
    bind_rows(.id = "var")
}

vars_for_smds <- c("slope", "LotSizeAcres", "sqfeet1000", "TotalBedrooms", "elev", "wildfire_hazard_pct")

smds <- lapply(list("SRA" = ndiffs %>% filter(treatmentgroup == "SRA"),
                    "LRA-VHFHSZ" = ndiffs %>% filter(treatmentgroup == "LRA-VHFHSZ"),
                    "No-codes" = ndiffs %>% filter(treatmentgroup == "No-codes")),
                    get_SMDs_for_data, vars = vars_for_smds) %>%
  bind_rows(.id = "Jurisdiction") %>%
  pivot_longer(cols = -c(Jurisdiction, var), names_to = "Vintage", values_to = "SMD")


vars_for_smds_dmn <- paste0(vars_for_smds, "_dmn")
smds_dmn <- lapply(list("SRA" = ndiffs %>% filter(treatmentgroup == "SRA"),
                    "LRA-VHFHSZ" = ndiffs %>% filter(treatmentgroup == "LRA-VHFHSZ"),
                    "No-codes" = ndiffs %>% filter(treatmentgroup == "No-codes")),
               get_SMDs_for_data, vars = vars_for_smds_dmn) %>%
  bind_rows(.id = "Jurisdiction") %>%
  pivot_longer(cols = -c(Jurisdiction, var), names_to = "Vintage", values_to = "SMD")

# Reorganize so we have variables in columns and SRA/LRA in rows
smds_wide <- smds %>%
  pivot_wider(names_from = var, values_from = SMD) 

names(smds_wide) <- c("Jurisdiction", "Vintage", "Slope", "Lot size", "Sqft", "Bedrooms", "Elevation", "Hazard")

smds_dmn_wide <- smds_dmn %>%
  pivot_wider(names_from = var, values_from = SMD)

names(smds_dmn_wide) <- c("Jurisdiction", "Vintage", "Slope", "Lot size", "Sqft", "Bedrooms", "Elevation", "Hazard")

# Make a table for paper (demeaned only)
datasummary_df(smds_dmn_wide,
               escape = F, 
               output = "latex_tabular") %>%
  writeLines(file.path(RES, "balance-smds-dmn.tex"))



## Figures showing how many observations / streets / fires are used for identification with different FEs ----

get_resid_stats <- function(feset, lhs = "afterCode", this_data) {
  # this_data <- regs
  # feset <- "street25Xfire"
  # lhs <- "afterCode"
  
  # Demean by given FE set
  this_data$var_dmn <- demean(as.formula(sprintf("%s ~ %s", lhs, feset)), 
                              data = this_data, na.rm = F)[, 1]
  
  # Return observations with some variation within group (i.e. are nonzero), SD of demeaned variable, and number of fires with variation
  data.frame(
    N_nonzero = sum(this_data$var_dmn != 0, na.rm = T),
    stdev = sd(this_data$var_dmn, na.rm = T),
    numstreets = n_distinct(this_data$streetXfire[this_data$var_dmn != 0]),
    numsubstreets = n_distinct(this_data$street25Xfire[this_data$var_dmn != 0]),
    numfires = n_distinct(this_data$incidentid[this_data$var_dmn != 0])
  )
  
  
}

felist <- c("incidentid", "streetXfire", "street25Xfire")
names(felist) <- c("Fire", "Street", "Sub-street")

afterCode_plot <- lapply(felist, get_resid_stats, this_data = regs) %>% 
  bind_rows(.id = "feset") %>%
  mutate(plotorder = n())



# Plot the number of cells with remaining observations
p <- ggplot(afterCode_plot, aes(x = feset, y = N_nonzero, fill = feset)) +
  geom_col() +
  scale_y_continuous(name = "Observations contributing to identification", labels = comma) +
  scale_x_discrete(name = "Fixed Effects", labels = wrap_format(20)) +
  scale_fill_viridis_d() +
  labs(x = NULL) +
  theme_cowplot() +
  theme(legend.position = "none")

save_plot(filename = file.path(RES, "residual_obs.pdf"), plot = p, base_height = 4.5)

# Plot the number of incidents with remaining observations
p <- ggplot(afterCode_plot, aes(x = feset, y = numfires, fill = feset)) +
  geom_col() +
  scale_y_continuous(name = "Incidents contributing to identification", labels = comma) +
  scale_x_discrete(name = "Fixed Effects", labels = wrap_format(20)) +
  scale_fill_viridis_d() +
  labs(x = NULL) +
  theme_cowplot() +
  theme(legend.position = "none")

save_plot(filename = file.path(RES, "residual_fires.pdf"), plot = p, base_height = 4.5)

## Compare streets w/ and w/out variation ----

regs$afterCode_dmn_street25 <- demean(as.formula("afterCode ~ street25Xfire"), data = regs, na.rm = T)[, 1]

substreetvar_bal <- regs %>%
  group_by(street25Xfire) %>%
  mutate(has_substreet_variation = any(afterCode_dmn_street25 != 0)) %>%
  ungroup() %>%
  transmute(has_substreet_variation, 
            destroyed = as.numeric(destroyed) * 100, 
            slope, 
            elev, 
            wildfire_hazard_pct,
            LotSizeAcres, 
            sqfeet1000, 
            TotalBedrooms, 
            incidentid) # change incidentid name to force clustering by datasummary_balance (not sure if this is actually working, though)

# Get t stats of differences
summ_diffs <- feols(c(destroyed, slope, elev, wildfire_hazard_pct, LotSizeAcres, sqfeet1000, TotalBedrooms) ~ has_substreet_variation | incidentid, data = substreetvar_bal) %>% summary(cluster = ~incidentid)

bal_df <- datasummary_balance( ~ has_substreet_variation,
                    data = substreetvar_bal, 
                    dinm = F,
                    fmt = fmt_significant(2),
                    output = "data.frame") 

names(bal_df) <- c("Variable", "wo_mean", "wo_sd", "w_mean", "w_sd")


# Combine mean and DF into a single string
bal_df <- bal_df %>%
  mutate(wo = sprintf("%s (%s)", wo_mean, bal_df$wo_sd),
         w = sprintf("%s (%s)", w_mean, bal_df$w_sd))

bal_df <- bal_df %>% 
  mutate(t = fmt_significant(2)(coeftable(summ_diffs)$`t value`))

# Clean up names
bal_df <- bal_df %>%
  transmute(
    Variable = case_when(
      Variable == "destroyed" ~ "Destroyed (\\%)",
      Variable == "slope" ~ "Ground slope",
      Variable == "elev" ~ "Elevation (m)",
      Variable == "wildfire_hazard_pct" ~ "Wildfire hazard (\\%)",
      Variable == "LotSizeAcres" ~ "Lot size (acres)",
      Variable == "sqfeet1000" ~ "Square feet (1000s)",
      Variable == "TotalBedrooms" ~ "Bedrooms"
    ),
    `Without Variation` = wo,
    `With Variation` = w,
    `t-statistic` = t
  )

# Write to latex
datasummary_df(bal_df,
               align = "lccc",
               escape = F, 
               output = "latex_tabular") %>%
  writeLines(file.path(RES, "substreetvar_balance.tex"))





# NEIGHBOR REGRESSIONS ----

### --- Set up the neighbor regression data -----

nregs <- regs %>%
  filter(regime %in% c("sra", "lraYY", "lraNY", "lraYN")) %>%
  mutate(subsample = total_neighbors_in_200m_centroids >= 10 & fireyear >= 2013) # Define subsample with smallest location error


### Figures: Neighbor regressions -----

### Estimations ----
nbor_fits <- list()

# Set up formulas
nbor_preferred <-  xpd(destroyed ~ ..("^any_Dwalls[[0-9]]*") + slope | fouryearbin + street25Xfire^treatmentgroup + fuelmodel)

# Preferred Substreet in the subsample
nbor_fits$substreet <- feols(nbor_preferred, data = nregs %>% filter(subsample == TRUE))

# Within 50m of perimeter
nbor_fits$perim <- feols(nbor_preferred, data = nregs %>% filter(subsample == TRUE))

# Destructive fires
nbor_fits$atrisk_gt1000 <- feols(nbor_preferred, data = nregs %>% filter(subsample == TRUE, inc_atrisk_gt1000 == TRUE))

# Centroids
nbor_fits$centroids<- feols(xpd(destroyed ~ fouryearbin + ..("^n_Dcentroids[[0-9]]*") + slope | street25Xfire^treatmentgroup + fuelmodel), data = nregs)


etable(nbor_fits, keep = "preTRUE", cluster = "incidentid")
etable(nbor_fits, keep = "postTRUE")
# Preview estimates
coefplot(nbor_fits, keep = c("pre"), cluster = "incidentid")
coefplot(nbor_fits, keep = c("post"), cluster = "incidentid")



### Figure: main neighbor regression plot -----

x_display = c(5, 15, 25, 35, 45, 55, 65, 75, 85, 95)
x_labels = c("0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100")

get_neighbor_coefs <- function(fit) {
  # Given a fitted model, return the coefficients in a tidy format ready for plotting
  # fit <- nbor_fits$substreet
  # x_display <- c(5, 15, 25, 35, 45, 55, 65, 75, 85, 95)
  # x_labels <- c("0-10", "10-20", "20-30", "30-40", "40-50", "50-60", "60-70", "70-80", "80-90", "90-100")
  # END DEBUG
  
  precode_df <- coefplot(fit,
                keep = c("_pre"),
                cluster = "incidentid")$prms %>%
    transmute(estimate, ci_low, ci_high, x, y, tocode = "Pre-code Neighbors")
  
  postcode_df <- coefplot(fit,
                keep = c("_post"),
                cluster = "incidentid")$prms %>%
    transmute(estimate, ci_low, ci_high, x, y, tocode = "Post-code Neighbors")
  
  df <- precode_df %>% 
    bind_rows(postcode_df) %>%
    mutate(x_display = x_display[x]) %>%
    mutate(tocode = factor(tocode, levels = c("Pre-code Neighbors", "Post-code Neighbors"))) %>%
    group_by(tocode) %>%
    arrange(tocode, x)
  
  df
}

main_neighbor_coefs <- get_neighbor_coefs(nbor_fits$substreet)

nbor_main <- ggplot(data = main_neighbor_coefs, aes(x = x_display, y = y)) +
  geom_hline(yintercept = 0, color = "black") +
  geom_errorbar(aes(ymax = ci_high, ymin = ci_low), color = "gray80", width = 1, linewidth = 1, linetype = "solid") +
  geom_point(aes(color = tocode, shape = tocode), size = 4) +
  globalplotoptions +
  theme(
    legend.position = "none",
    panel.spacing = unit(2, "lines"),
    strip.background = element_blank()
  ) +
  scale_color_manual(values = c("red", "blue")) +
  scale_shape_manual(values = c(19, 19)) +
  scale_x_continuous(
    name = "Distance to Neighbor (meters)",
    breaks = x_display, 
    labels = x_labels,
    lim = c(0, 100)
  ) +
  scale_y_continuous(name = "Change in Destruction Probability", labels = percent, lim = c(-0.05, 0.05), breaks = c(-0.025, 0, 0.025)) +
  facet_wrap(vars(tocode), nrow = 2, scales = "free")

nbor_main

save_plot(file.path(RES, "nborregs.pdf"), plot = nbor_main, base_height = 8, base_asp = 1.1)

# Figure: Sensitivity of Neighbor spillover regressions ----

all_neighbor_coefs <- lapply(nbor_fits, get_neighbor_coefs) %>%
  bind_rows(.id = "model")

# Label and jitter by group
all_neighbor_coefs <- all_neighbor_coefs %>%
  mutate(x_display = case_when(
    model == "substreet" ~ x_display - 2.25,
    model == "perim" ~ x_display - 0.75,
    model == "atrisk_gt1000" ~ x_display + 0.75,
    model == "centroids" ~ x_display + 2.25
  )) %>%
  mutate(model = factor(model, levels = c("substreet", "perim", "atrisk_gt1000", "centroids"),
                        labels = c("Sub-street", "50m+ From Perimeter", "1000+ At-Risk", "Centroids")))

nbor_all <- ggplot(data = all_neighbor_coefs, aes(x_display, y, shape = model)) +
  geom_hline(yintercept = 0, color = "black") +
  geom_errorbar(aes(ymax = ci_high, ymin = ci_low), color = "gray80", width = 1, linewidth = 0.5, linetype = "solid") +
  geom_point(aes(color = tocode), size = 2) +
  globalplotoptions +
  theme(
    panel.spacing = unit(1, "lines"),
    strip.background = element_blank()
  ) +
  scale_shape_manual(name = NULL, values = c(15, 16, 17, 18)) +
  scale_color_manual(name = NULL, values = c("red", "blue"), guide = "none") + 
  scale_x_continuous(
    name = "Distance to Neighbor (meters)",
    breaks = x_display, 
    labels = x_labels,
    lim = c(0, 100)
  ) +
  scale_y_continuous(name = "Change in Destruction Probability", labels = percent, lim = c(-0.05, 0.05), breaks = c(-0.025, 0, 0.025)) +
  facet_wrap(vars(tocode), nrow = 2, scales = "free")

save_plot(file.path(RES, "nborregs-sensitivity.pdf"), plot = nbor_all, base_height = 6, base_asp = 1.6)


# Table: # of neighbors in nearest bin -----

estimate_nbor <- function(this_data, postcode_var = "n_Dwalls0_post", precode_var = "n_Dwalls0_pre", fe = "street25Xfire^treatmentgroup") {
  # Define easy to use variables for nearby neighbor counts

  this_data <- this_data %>%
    mutate(
      precode = get(precode_var),
      postcode = get(postcode_var),
      precode1 = get(precode_var) == 1,
      postcode1 = get(postcode_var) == 1,
      precode2 = get(precode_var) == 2,
      postcode2 = get(postcode_var) == 2,
    )


  fmla1 <- as.formula(sprintf("destroyed ~ precode + postcode + slope | fouryearbin + fuelmodel + %s", fe)) # Estimate model with counts
  fmla2 <- as.formula(sprintf("destroyed ~ precode1 + precode2 + postcode1 + postcode2 + slope | fouryearbin + fuelmodel + %s", fe)) # estimate model with binary indicators
  
  fit1 <- feols(fmla1, data = this_data)
  fit2 <- feols(fmla2, data = this_data)
  return(list(fit1, fit2))
}

fits_nbor <- list()

fits_nbor[["Walls"]] <- estimate_nbor(nregs %>% filter(subsample == TRUE), 
                                              precode_var = "n_Dwalls0_pre", postcode_var = "n_Dwalls0_post")

fits_nbor[["Centroids"]] <- estimate_nbor(nregs, precode_var = "n_Dcentroids0_pre", postcode_var = "n_Dcentroids0_post")

etable(fits_nbor$Walls[[1]], fits_nbor$Centroids[[1]], fits_nbor$Walls[[2]], fits_nbor$Centroids[[2]],
  cluster = "incidentid",
  group = list(
    "Topography" = c("%slope", "%fuelmodel")
  ),
  extralines = list("_^Distances" = c("Walls", "Centroids", "Walls", "Centroids")), 
  tex = T
) %>%
  write(file.path(RES, "nbortable.tex"))

# Appendix Table: Linearity of neighbor effects ----


# fit_nbor_linear <- list()
# fit_nbor_linear[["Walls"]] <- estimate_nbor_linear(nregs %>% filter(subsample == TRUE), 
#                                                    precode_var = "n_Dwalls0_pre", postcode_var = "n_Dwalls0_post")
# 
# fit_nbor_linear[["Centroids"]] <- estimate_nbor_linear(nregs, precode_var = "n_Dcentroids0_pre", postcode_var = "n_Dcentroids0_post")
# 
# 
# etable(fit_nbor_linear,
#        cluster = "incidentid",
#        group = list(
#          "Topography" = c("%slope", "%fuelmodel")
#        ),
#        headers = list("^" = names(fit_nbor_linear)),
#        tex = T
# ) %>%
#   write(file.path(RES, "nbortable-linearity.tex"))


####### APPX ROBUSTNESS: SUTVA
## This table focuses specifically on SUTVA issues (pre-code homes benefiting from to-code neighbors, attenuating results). It controls directly for number of treated and untreated neighbors,
## and thus needs to use a restricted sample where those things are known.

estimate_nbor_sutva <- function(this_data, precode_var = "n_Dwalls0_pre", postcode_var = "n_Dwalls0_post", fe = "street25Xfire^treatmentgroup") {
  # this_data <- nregs %>% filter(regime != "lraNN")
  # END DEBUG
  
  this_data <- this_data %>%
    mutate(
      precode = get(precode_var),
      postcode = get(postcode_var)
    )
  
  fit1 <- feols(as.formula(sprintf("destroyed ~ i(period, treatmentgroup, ref = 0) + slope | fuelmodel + %s", fe)),
               data = this_data)
  fit2 <- feols(as.formula(sprintf("destroyed ~ i(period, treatmentgroup, ref = 0) + slope + precode + postcode | fuelmodel + %s", fe)),
                data = this_data)
  
  
  list(fit1, fit2)
}

sutva <- nregs %>% filter(regime != "lraNN")

rmodels <- list()

rmodels[["Walls"]] <- estimate_nbor_sutva(sutva %>% filter(subsample, complete.cases(n_Dwalls0_pre, n_Dwalls0_post)), 
                                          precode_var = "n_Dwalls0_pre", postcode_var = "n_Dwalls0_post")
rmodels[["Centroids"]] <- estimate_nbor_sutva(sutva %>% filter(complete.cases(n_Dcentroids0_pre, n_Dcentroids0_post)), 
                                                               precode_var = "n_Dcentroids0_pre", postcode_var = "n_Dcentroids0_post")


etable(c(rmodels$Walls, rmodels$Centroids),
  cluster = "incidentid",
  depvar = F,
  headers = list("^" = list("Walls" = 2, "Centroids" = 2)),
  order = c("SRA", "LRA", "1998--2007", "2007--2016", "Ground slope", "pre-code", "post-code"),
  dict = dict_names,
  tex = T
) %>%
  writeLines(file.path(RES, "sutva-check.tex"))

# Aggregate into neighborhoods ----

aggregate_data <- function(data, ...) {
  agg_data <- data %>%
    group_by(...) %>%
    summarize(
      destroyed_prop = sum(destroyed) / sum(!is.na(destroyed)),
      period0 = sum(period == 0),
      period1 = sum(period == 1),
      period2 = sum(period == 2),
      sra = sum(treatmentgroup == "SRA"),
      lrav = sum(treatmentgroup == "LRA-VHFHSZ"),
      nocodes = sum(treatmentgroup == "No-codes"),
      slope = mean(slope, na.rm = T),
      wildfire_hazard_pct = mean(wildfire_hazard_pct, na.rm = T), # Use as a coarse substitute for fuel model
      n = n()
    ) %>%
    ungroup()

  # Create proportions
  agg_data <- agg_data %>%
    mutate(
      period0_prop = period0 / (period0 + period1 + period2),
      period1_prop = period1 / (period0 + period1 + period2),
      period2_prop = period2 / (period0 + period1 + period2),
      sra_prop = sra / (sra + lrav + nocodes),
      lrav_prop = lrav / (sra + lrav + nocodes),
      nocodes_prop = nocodes / (sra + lrav + nocodes),
      period12_prop = (period1 + period2) / (period0 + period1 + period2),
      codes_prop = (sra + lrav) / (sra + lrav + nocodes)
    )

  # A handful of streets have both SRA/LRA and No-codes homes.

  # Create manual interactions
  agg_data <- agg_data %>%
    mutate(
      period1_sra_prop = period1_prop * sra_prop,
      period2_sra_prop = period2_prop * sra_prop,
      period1_lrav_prop = period1_prop * lrav_prop,
      period2_lrav_prop = period2_prop * lrav_prop,
      period1_nocodes_prop = period1_prop * nocodes_prop,
      period2_nocodes_prop = period2_prop * nocodes_prop,
      postcodeXcoderequired = period12_prop * codes_prop,
      postcodeXnocodes = period12_prop * nocodes_prop,
      postcodeXcoderequiredXabove = as.numeric(postcodeXcoderequired > 0.5) * postcodeXcoderequired
    )

  agg_data
}

regs_blocks <- regs %>% aggregate_data(block, incidentid)
regs_streets <- regs %>% aggregate_data(streetXfire, incidentid)
regs_substreets <- regs %>% aggregate_data(street25Xfire, streetXfire, incidentid)
regs_streetXblocks <- regs %>% aggregate_data(block, streetXfire, incidentid)

# Estimate models
# We are underpowered with this design, so we also estimate a "simple" model that pools To-code areas and periods.

fmla_simp <- as.formula("destroyed_prop ~ postcodeXcoderequired + postcodeXnocodes + slope | incidentid")

fits_street <- list()

# First, run a comparable regression on our full home dataset
regs <- regs %>%
  mutate(coderequired = treatmentgroup %in% c("SRA", "LRA-VHFHSZ"),
         postcodeXcoderequired = period %in% c(1, 2) & coderequired,
         postcodeXnocodes = period %in% c(1, 2) & !coderequired)
         
fits_street[["Homes"]] <- feols(destroyed ~ postcodeXcoderequired + postcodeXnocodes + slope | incidentid, data = regs)

# Census blocks
fits_street[["Census\nBlocks"]] <- feols(fmla_simp, data = regs_blocks, weights = ~n) %>%
  summary(cluster = ~incidentid)

# streets
fits_street[["Street"]] <- feols(fmla_simp, data = regs_streets, weights = ~n) %>%
  summary(cluster = ~incidentid)

# 25 home streets
fits_street[["Sub-street"]] <- feols(fmla_simp, data = regs_substreets, weights = ~n) %>%
  summary(cluster = ~incidentid)


# Street X block
fits_street[["Street\n$\\times$ Block"]] <- feols(fmla_simp, data = regs_streetXblocks, weights = ~n) %>%
  summary(cluster = ~incidentid)


setFixest_dict(dict = c(
  "destroyed_prop" = "Destroyed",
  "postcodeXcoderequiredTRUE" =  "1998--2016 $\\times$ Code-required",
  "postcodeXcoderequired" = "1998--2016 $\\times$ Code-required",
  "postcodeXnocodesTRUE" = "1998--2016 $\\times$ No-codes",
  "postcodeXnocodes" = "1998--2016 $\\times$ No-codes",
  "postcodeXcoderequiredXabove" = "1998--2016 $\\times$ Code-required $\\times >$ 50\\%",
  "slope" = "Ground slope"
))

etable(fits_street)

etable(fits_street,
  depvar = F,
  headers = list("^" = names(fits_street)),
  keep = c("1998", "slope"),
  order = "Prop.",
  tex = T
) %>%
  write(file.path(RES, "street-regs.tex"))

